package org.modeshape.jcr.api.text;
import javax.jcr.Node;
import javax.jcr.Property;
import javax.jcr.RepositoryException;
public abstract class TextExtractor {
...
/**
* Determine if this extractor is capable of processing content with the supplied MIME type.
* @param mimeType the MIME type; never null
* @return true if this extractor can process content with the supplied MIME type, or false otherwise.
*/
public abstract boolean supportsMimeType( String mimeType );
/**
* Extract text from the given {@link Binary}, using the given output to record the results.
* @param binary the binary value that can be used in the extraction process; never <code>null</code>
* @param output the output from the sequencing operation; never <code>null</code>
* @param context the context for the sequencing operation; never <code>null</code>
* @throws Exception if there is a problem during the extraction process
*/
public abstract void extractFrom( Binary binary,
TextExtractor.Output output,
Context context ) throws Exception;
/**
* Allows subclasses to process the stream of binary value property in "safe" fashion, making sure the stream is closed at the
* end of the operation.
* @param binary a {@link org.modeshape.jcr.api.Binary} who is expected to contain a non-null binary value.
* @param operation a {@link org.modeshape.jcr.api.text.TextExtractor.BinaryOperation} which should work with the stream
* @param <T> the return type of the binary operation
* @return whatever type of result the stream operation returns
* @throws Exception if there is an error processing the stream
*/
protected final <T> T processStream( Binary binary,
BinaryOperation<T> operation ) throws Exception {
...
}
/**
* Interface which can be used by subclasses to process the input stream of a binary property.
* @param <T> the return type of the binary operation
*/
protected interface BinaryOperation<T> {
T execute( InputStream stream ) throws Exception;
}
/**
* Interface which provides additional information to the text extractors, during the extraction operation.
*/
public interface Context {
String mimeTypeOf( String name,
Binary binaryValue ) throws RepositoryException, IOException;
}
/**
* The interface passed to a TextExtractor to which the extractor should record all text content.
*/
public interface Output {
/**
* Record the text as being extracted. This method can be called multiple times during a single extract.
* @param text the text extracted from the content.
*/
void recordText( String text );
}
}